#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
#  File-Name:	local-news-descriptives.R
#  Date:	March 22, 2022
#  Author: Bernhard Clemm von Hohenberg
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
#+
#+#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# SETUP  ####
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

rm(list=ls())

library(tidyverse)
library(htmlTable)
library(rstatix)
library(moments)
library(tidycomm)

data_path <- "/Users/bernhardclemm/Dropbox/Mac/Documents/Academia/EXPO/repositories/EXPO2.0/Projects/Local-News/Data/"
output_path <- "/Users/bernhardclemm/Dropbox/Mac/Documents/Academia/EXPO/repositories/EXPO2.0/Projects/Local-News/Output/"
code_path <- "/Users/bernhardclemm/Dropbox/Mac/Documents/Academia/EXPO/repositories/EXPO2.0/Projects/Local-News/Code/"

source(paste0(code_path, "local-news-recoding.R"))

#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# ICR  ####
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

coding <- read.csv(paste0(data_path, "domains/coding.csv"))

coding <- coding %>%
  filter(Frequency.count.in.our.data > 0 & domain != "median non-zero") %>%
  distinct(domain, .keep_all = T) %>%
  mutate(id = 1:nrow(.)) %>%
  select(id, coding_jane, coding_bernhard) %>%
  pivot_longer(c(coding_jane, coding_bernhard),
               names_to = "coder")

coding %>% test_icr(id, coder, value, cohens_kappa = T)

#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# TRACE DATA DESCRIPTIVES ####
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

# Total of visits of those in the final sample (text-only): 36,644,355
visits_total <- 
  sum(final_data_allwaves$visits_u_w1, na.rm = T) +
  sum(final_data_allwaves$visits_u_w2, na.rm = T) +
  sum(final_data_allwaves$visits_u_w3, na.rm = T)

# Active days (text-only): 189
median(final_data_allwaves$active_days_w1 + 
         final_data_allwaves$active_days_w2 +
         final_data_allwaves$active_days_w3, na.rm = T)

# Median number of visits (text-only): 38,265.5
median(final_data_allwaves$visits_u_w1 + 
         final_data_allwaves$visits_u_w2 +
         final_data_allwaves$visits_u_w3, na.rm = T)

# Average/median national/local (text and Appendix)
traces_summ <- final_data_allwaves %>% 
  get_summary_stats(
    loc_w1_mean, loc_w2_mean, loc_w3_mean,
    nat_w1_mean, nat_w2_mean, nat_w3_mean) %>%
  select(variable, n, min, max, mean, median, sd)

# Total news local/national news browsing compared to all browsing
news_prop <- sum(
  final_data_allwaves$loc_w1 + final_data_allwaves$loc_w2 + final_data_allwaves$loc_w3 +
    final_data_allwaves$nat_w1 + final_data_allwaves$nat_w2 + final_data_allwaves$nat_w3) / visits_total

#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# NEWS DOMAINS DESCRIPTIVES ####
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

news_domains <- read.csv(paste0(data_path, "domains/us_news_domains.csv"))

news_domains_loc <- news_domains %>%
  filter(local_national_international == "local")
news_domains_loc_string <- c()
for (domain in news_domains_loc$domain) {
  news_domains_loc_string <- paste0(news_domains_loc_string, ", ", domain)
}

news_domains_nat <- news_domains %>%
  filter(local_national_international == "national")
news_domains_nat_string <- c()
for (domain in news_domains_nat$domain) {
  news_domains_nat_string <- paste0(news_domains_nat_string, ", ", domain)
}

#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# WITHIN-PERSON VARIABILITY ####
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

changes <- final_data_allwaves %>%
  select(person_id, nat_w1_mean, nat_w2_mean, nat_w3_mean,
         loc_w1_mean, loc_w2_mean, loc_w3_mean) %>%
  mutate(nat_w1w2 = nat_w2_mean - nat_w1_mean,
         nat_w2w3 = nat_w3_mean - nat_w2_mean,
         loc_w1w2 = loc_w2_mean - loc_w1_mean,
         loc_w2w3 = loc_w3_mean - loc_w2_mean)

# Descriptives ####

median(changes$nat_w1_mean)
median(changes$nat_w2_mean)
median(changes$nat_w3_mean)

median(abs(changes$nat_w1w2))
median(abs(changes$nat_w2w3))
median(abs(changes$loc_w1w2))
median(abs(changes$loc_w2w3))

# Plots ####

nat_w1w2_plot <- ggplot(changes, aes(x = nat_w1w2)) +
  geom_histogram(bins = 200) +
  theme_light() +
  scale_x_continuous(name = "Within-person change Wave 2 - Wave 1 (National news)") +
  scale_y_continuous(name = "Count")

nat_w2w3_plot <- ggplot(changes, aes(x = nat_w2w3)) +
  geom_histogram(bins = 200) +
  theme_light() +
  scale_x_continuous(name = "Within-person change Wave 3 - Wave 2 (National news)") +
  scale_y_continuous(name = "Count")

loc_w1w2_plot <- ggplot(changes, aes(x = loc_w1w2)) +
  geom_histogram(bins = 200) +
  theme_light() +
  scale_x_continuous(name = "Within-person change Wave 2 - Wave 1 (Local news)") +
  scale_y_continuous(name = "Count")

loc_w2w3_plot <- ggplot(changes, aes(x = loc_w2w3)) +
  geom_histogram(bins = 200) +
  theme_light() +
  scale_x_continuous(name = "Within-person change Wave 3 - Wave 2 (Local news)") +
  scale_y_continuous(name = "Count")

changes_plot <- ggarrange(nat_w1w2_plot, nat_w2w3_plot, 
                          loc_w1w2_plot, loc_w2w3_plot)

## Figure A.1 (Appendix)

ggsave(paste0(output_path, "/figures/changes_plot.png"),
       width = 25, height = 25, unit = "cm")

#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# SAMPLE DEMOGRAPHICS  ####
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

population = data.frame(
  value = c(
    "Age: 0–17", "Age: 18–24", "Age: 25–54", "Age: 55–64", "Age: 65+",
    "Education: Less than high school", 
    "Education: High school",
    "Education: Junior college", 
    "Education: Bachelor",
    "Education: Graduate school",
    "Ethnicity: White", "Ethnicity: Black", "Ethnicity: Other",
    "Sex: Male", "Sex: Female",
    "Party: Democrat", "Party: Independent", "Party: Republican",
    "Ideology: Left", "Ideology: Center", "Ideology: Right"),
  
  population = c(
    18.73, 13.27, 39.45, 12.91, 15.63, # age
    9.92, 28.11, 27.34, 22.55, 12.07, # education
    60.1, 12.2, 26.7, # ethnicity 
    48.9, 51.1, rep(NA, 6) # sex 
  )
)

missing_sociodem <- read.csv(paste0(data_path, "surveys/us_missing_w0_sociodems.csv"))
us_w0a <- read.csv(paste0(data_path, "surveys/US_survey_w0_lucid_raw.csv"))
us_w0b <- read.csv(paste0(data_path, "surveys/US_survey_w0_qualtrics_raw.csv"))
wave0a <- recode_US_w0(us_w0a)
wave0b <- recode_US_w0(us_w0b)
wave0 <- rbind(wave0a, wave0b) 

wave0 <- wave0 %>%
  left_join(., missing_sociodem %>%
              rename("gender_estimated" = gender_w0,
                     "age_estimated" = age_w0,
                     "ethn_estimated" = ethn_w0,
                     "hisp_estimated" = hisp_w0,
                     "edu_estimated" = edu_w0) %>%
              mutate(gender_estimated = gender_estimated - 1), 
            by = "person_id") %>%
  mutate(age_w0 = ifelse(
    is.na(age_w0) & !is.na(age_estimated), 
    age_estimated, age_w0),
    gender_w0 = ifelse(
      is.na(gender_w0) & !is.na(gender_estimated), 
      gender_estimated, gender_w0),
    ethn_w0 = ifelse(
      is.na(ethn_w0) & !is.na(ethn_estimated), 
      ethn_estimated, ethn_w0),
    hisp_w0 = ifelse(
      is.na(hisp_w0) & !is.na(hisp_estimated), 
      hisp_estimated, hisp_w0),
    edu_w0 = ifelse(
      is.na(edu_w0) & !is.na(edu_estimated), 
      edu_estimated, edu_w0)) %>%
  mutate(
    ethn_w0_fac = as.factor(case_when(
      ethn_w0 == 1 ~ "White",
      ethn_w0 == 2 ~ "Black",
      ethn_w0 == 3 ~ "Asian",
      ethn_w0 == 4 ~ "Native American",
      ethn_w0 == 7 ~ "Prefer to self-describe")),
    edu_w0_fac_5 = case_when(
      edu_w0 %in% c(1, 5, 6) ~ "Education: Less than high school", 
      edu_w0 %in% c(8) ~ "Education: High school", 
      edu_w0 %in% c(9, 10, 11) ~ "Education: Junior college", 
      edu_w0 %in% c(12, 13) ~ "Education: Bachelor", 
      edu_w0 %in% c(14, 15) ~ "Education: Graduate school"),
    age_w0_cat = case_when(
      age_w0 >= 18 & age_w0 < 25 ~ "Age: 18–24",
      age_w0 >= 25 & age_w0 < 55 ~ "Age: 25–54",
      age_w0 >= 55 & age_w0 < 65 ~ "Age: 55–64",
      age_w0 >= 65 ~ "Age: 65+"),
    party_w0_cat = case_when(
      party_w0 %in% c(1, 2, 3) ~ "Democrat", 
      party_w0 %in% c(4) ~ "Independent", 
      party_w0 %in% c(5, 6, 7) ~ "Republican"))

descriptives_w0 <- wave0 %>%
  filter(!is.na(ethn_w0)) %>%
  summarise(wave = "wave_0",
            n = n(),
            sex_male = round(length(which(gender_w0 == 0))*100 /n, 2),
            sex_female = round(length(which(gender_w0 == 1))*100 /n, 2),
            age_1 = round(length(which(
              age_w0_cat == "Age: 18–24"))*100 /n , 2),
            age_2 = round(length(which(
              age_w0_cat == "Age: 25–54"))*100  /n, 2),
            age_3 = round(length(which(
              age_w0_cat == "Age: 55–64"))*100 /n, 2),
            age_4 = round(length(which(
              age_w0_cat == "Age: 65+"))*100 /n, 2),
            ethn_white = round(length(which(
              ethn_w0_fac == "White"))*100 /n, 2),
            ethn_other = round(length(which(
              ethn_w0_fac %in% c("Asian", "Prefer to self-describe", "Native American")))*100 /n, 2),
            ethn_black = round(length(which(
              ethn_w0_fac == "Black")) *100 /n, 2),
            edu_nohigh = round(length(which(
              edu_w0_fac_5 == "Education: Less than high school")) *100 /n, 2),
            edu_highschool = round(length(which(
              edu_w0_fac_5 == "Education: High school"))*100  /n, 2),
            edu_somecollege = round(length(which(
              edu_w0_fac_5 == "Education: Junior college")) *100 /n, 2),
            edu_bachelor = round(length(which(
              edu_w0_fac_5 == "Education: Bachelor"))*100  /n, 2),
            edu_highedu = round(length(which(
              edu_w0_fac_5 == "Education: Graduate school"))*100 /n, 2))

descriptives_w1 <- survey_trace_data %>%
  filter(!is.na(ResponseId_w1)) %>%
  summarise(wave = "wave_1",
            n = n(),
            sex_male = round(length(which(gender_w0 == 0))*100/n, 2),
            sex_female = round(length(which(gender_w0 == 1))*100/n, 2),
            age_1 = round(length(which(
              age_w0_cat == "Age: 18–24")) *100/n, 2),
            age_2 = round(length(which(
              age_w0_cat == "Age: 25–54")) *100/n, 2),
            age_3 = round(length(which(
              age_w0_cat == "Age: 55–64")) *100/n, 2),
            age_4 = round(length(which(
              age_w0_cat == "Age: 65+")) *100/n, 2),
            ethn_white = round(length(which(
              ethn_w0_fac == "White")) *100/n, 2),
            ethn_other = round(length(which(
              ethn_w0_fac %in% c("Asian", "Prefer to self-describe", "Native American")))*100/n, 2),
            ethn_black = round(length(which(
              ethn_w0_fac == "Black")) *100/n, 2),
            edu_nohigh = round(length(which(
              edu_w0_fac_5 == "Education: Less than high school")) *100/n, 2),
            edu_highschool = round(length(which(
              edu_w0_fac_5 == "Education: High school")) *100/n, 2),
            edu_somecollege = round(length(which(
              edu_w0_fac_5 == "Education: Junior college")) *100/n, 2),
            edu_bachelor = round(length(which(
              edu_w0_fac_5 == "Education: Bachelor")) *100/n, 2),
            edu_highedu = round(length(which(
              edu_w0_fac_5 == "Education: Graduate school"))*100/n, 2))

descriptives_w2 <- survey_trace_data %>%
  filter(!is.na(ResponseId_w2)) %>%
  summarise(wave = "wave_2",
            n = n(),
            sex_male = round(length(which(gender_w0 == 0))*100/n, 2),
            sex_female = round(length(which(gender_w0 == 1))*100/n, 2),
            age_1 = round(length(which(
              age_w0_cat == "Age: 18–24")) *100/n, 2),
            age_2 = round(length(which(
              age_w0_cat == "Age: 25–54")) *100/n, 2),
            age_3 = round(length(which(
              age_w0_cat == "Age: 55–64")) *100/n, 2),
            age_4 = round(length(which(
              age_w0_cat == "Age: 65+")) *100/n, 2),
            ethn_white = round(length(which(
              ethn_w0_fac == "White")) *100/n, 2),
            ethn_other = round(length(which(
              ethn_w0_fac %in% c("Asian", "Prefer to self-describe", "Native American")))*100/n, 2),
            ethn_black = round(length(which(
              ethn_w0_fac == "Black")) *100/n, 2),
            edu_nohigh = round(length(which(
              edu_w0_fac_5 == "Education: Less than high school")) *100/n, 2),
            edu_highschool = round(length(which(
              edu_w0_fac_5 == "Education: High school")) *100/n, 2),
            edu_somecollege = round(length(which(
              edu_w0_fac_5 == "Education: Junior college")) *100/n, 2),
            edu_bachelor = round(length(which(
              edu_w0_fac_5 == "Education: Bachelor")) *100/n, 2),
            edu_highedu = round(length(which(
              edu_w0_fac_5 == "Education: Graduate school"))*100/n, 2))

descriptives_w3 <- survey_trace_data %>%
  filter(!is.na(ResponseId_w3)) %>%
  summarise(wave = "wave_3",
            n = n(),
            sex_male = round(length(which(gender_w0 == 0))*100/n, 2),
            sex_female = round(length(which(gender_w0 == 1))*100/n, 2),
            age_1 = round(length(which(
              age_w0_cat == "Age: 18–24")) *100/n, 2),
            age_2 = round(length(which(
              age_w0_cat == "Age: 25–54")) *100/n, 2),
            age_3 = round(length(which(
              age_w0_cat == "Age: 55–64")) *100/n, 2),
            age_4 = round(length(which(
              age_w0_cat == "Age: 65+")) *100/n, 2),
            ethn_white = round(length(which(
              ethn_w0_fac == "White")) *100/n, 2),
            ethn_other = round(length(which(
              ethn_w0_fac %in% c("Asian", "Prefer to self-describe", "Native American")))*100/n, 2),
            ethn_black = round(length(which(
              ethn_w0_fac == "Black")) *100/n, 2),
            edu_nohigh = round(length(which(
              edu_w0_fac_5 == "Education: Less than high school")) *100/n, 2),
            edu_highschool = round(length(which(
              edu_w0_fac_5 == "Education: High school")) *100/n, 2),
            edu_somecollege = round(length(which(
              edu_w0_fac_5 == "Education: Junior college")) *100/n, 2),
            edu_bachelor = round(length(which(
              edu_w0_fac_5 == "Education: Bachelor")) *100/n, 2),
            edu_highedu = round(length(which(
              edu_w0_fac_5 == "Education: Graduate school"))*100/n, 2))

descriptives_w3_final <- survey_trace_data %>%
  filter(included == TRUE) %>%
  summarise(wave = "wave_3",
            n = n(),
            sex_male = round(length(which(gender_w0 == 0))*100/n, 2),
            sex_female = round(length(which(gender_w0 == 1))*100/n, 2),
            age_1 = round(length(which(
              age_w0_cat == "Age: 18–24")) *100/n, 2),
            age_2 = round(length(which(
              age_w0_cat == "Age: 25–54")) *100/n, 2),
            age_3 = round(length(which(
              age_w0_cat == "Age: 55–64")) *100/n, 2),
            age_4 = round(length(which(
              age_w0_cat == "Age: 65+")) *100/n, 2),
            ethn_white = round(length(which(
              ethn_w0_fac == "White")) *100/n, 2),
            ethn_other = round(length(which(
              ethn_w0_fac %in% c("Asian", "Prefer to self-describe", "Native American")))*100/n, 2),
            ethn_black = round(length(which(
              ethn_w0_fac == "Black")) *100/n, 2),
            edu_nohigh = round(length(which(
              edu_w0_fac_5 == "Education: Less than high school")) *100/n, 2),
            edu_highschool = round(length(which(
              edu_w0_fac_5 == "Education: High school")) *100/n, 2),
            edu_somecollege = round(length(which(
              edu_w0_fac_5 == "Education: Junior college")) *100/n, 2),
            edu_bachelor = round(length(which(
              edu_w0_fac_5 == "Education: Bachelor")) *100/n, 2),
            edu_highedu = round(length(which(
              edu_w0_fac_5 == "Education: Graduate school"))*100/n, 2))

descriptives <- as.data.frame(t(rbind(
  descriptives_w0,
  descriptives_w1,
  descriptives_w2, 
  descriptives_w3, 
  descriptives_w3_final))) %>%
  rename("wave_0" = V1,
         "wave_1" = V2,
         "wave_2" = V3,
         "wave_3" = V4,
         "wave_3_final" = V5) %>%
  mutate(values = colnames(descriptives_w1)) %>%
  filter(values != "wave") %>%
  mutate(variable = case_when(
    values == "age_1" ~ "Age: 18–24",
    values == "age_2" ~ "Age: 25–54",
    values == "age_3" ~ "Age: 55–64",
    values == "age_4" ~ "Age: 65+",
    values == "sex_male" ~ "Sex: Male",
    values == "sex_female" ~ "Sex: Female",
    values == "edu_nohigh" ~ "Education: Less than high school",
    values == "edu_highschool" ~ "Education: High school",
    values == "edu_somecollege" ~ "Education: Junior college",
    values == "edu_bachelor" ~ "Education: Bachelor",
    values == "edu_highedu" ~ "Education: Graduate school",
    values == "pid_dem" ~ "Party: Democrat",
    values == "pid_rep" ~ "Party: Independent",
    values == "pid_indep" ~ "Party: Republican",
    values == "ethn_white" ~ "Ethnicity: White",
    values == "ethn_other" ~ "Ethnicity: Black",
    values == "ethn_black" ~ "Ethnicity: Other",
    values == "n" ~ "n observations")) %>%
  # filter(variable != "n") %>%
  select(-values) %>%
  rename("value" = variable) %>%
  left_join(., population, by = "value") %>%
  select(value, population, wave_0, wave_1, wave_2, wave_3, wave_3_final) 

# Table A.2 (Appendix)

htmlTable(descriptives, rnames = NULL)

# # VARIABLE DESCRIPTION  ####
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

# Batteries to summarize

ap_w1_vars <- c("ft_party_opp_w1", 
                "mean_party_opp_w1",
                "selfish_party_opp_w1", 
                "stupid_party_opp_w1")

ap_w2_vars <- c("ft_party_opp_w2", 
                "mean_party_opp_w2",
                "selfish_party_opp_w2", 
                "stupid_party_opp_w2")

ap_w3_vars <- c("ft_party_opp_w3", 
                "mean_party_opp_w3",
                "selfish_party_opp_w3", 
                "stupid_party_opp_w3")

attextr_w1_vars <- c("att_econ_1_w1", "att_econ_2_w1", "att_econ_3_w1",
                     "att_climate_1_w1", "att_climate_2_w1", "att_climate_3_w1",
                     "att_immigrant_1_w1", "att_immigrant_2_w1", "att_immigrant_3_w1", 
                     "att_gun_1_w1", "att_gun_2_w1", "att_gun_3_w1")

attextr_w2_vars <- c("att_econ_1_w2", "att_econ_2_w2", "att_econ_3_w2",
                     "att_climate_1_w2", "att_climate_2_w2", "att_climate_3_w2",
                     "att_immigrant_1_w2", "att_immigrant_2_w2", "att_immigrant_3_w2", 
                     "att_gun_1_w2", "att_gun_2_w2", "att_gun_3_w2")

attextr_w3_vars <- c("att_econ_1_w3", "att_econ_2_w3", "att_econ_3_w3",
                     "att_climate_1_w3", "att_climate_2_w3", "att_climate_3_w3",
                     "att_immigrant_1_w3", "att_immigrant_2_w3", "att_immigrant_3_w3", 
                     "att_gun_1_w3", "att_gun_2_w3", "att_gun_3_w3")

attextr_w1_fold_vars <- c("att_econ_1_w1_fold", "att_econ_2_w1_fold", "att_econ_3_w1_fold",
                          "att_climate_1_w1_fold", "att_climate_2_w1_fold", "att_climate_3_w1_fold",
                          "att_immigrant_1_w1_fold", "att_immigrant_2_w1_fold", "att_immigrant_3_w1_fold",
                          "att_gun_1_w1_fold", "att_gun_2_w1_fold", "att_gun_3_w1_fold")

attextr_w2_fold_vars <- c("att_econ_1_w2_fold", "att_econ_2_w2_fold", "att_econ_3_w2_fold",
                          "att_climate_1_w2_fold", "att_climate_2_w2_fold", "att_climate_3_w2_fold",
                          "att_immigrant_1_w2_fold", "att_immigrant_2_w2_fold", "att_immigrant_3_w2_fold",
                          "att_gun_1_w2_fold", "att_gun_2_w2_fold", "att_gun_3_w2_fold")

attextr_w3_fold_vars <- c("att_econ_1_w3_fold", "att_econ_2_w3_fold", "att_econ_3_w3_fold",
                          "att_climate_1_w3_fold", "att_climate_2_w3_fold", "att_climate_3_w3_fold",
                          "att_immigrant_1_w3_fold", "att_immigrant_2_w3_fold", "att_immigrant_3_w3_fold",
                          "att_gun_1_w3_fold", "att_gun_2_w3_fold", "att_gun_3_w3_fold")

# Variables to summarize

vars_summ <- c(
  # moderators
  "edu_high_w0", "white_w0",
  "int_politics_w2", "int_politics_w3",
  # participation W2
  "participation_w2", "participation_w2_bin",
  # participation W3
  "participation_w3", "participation_w3_bin",
  # Affective polarization
  all_of(ap_w1_vars), "ap_w1", 
  all_of(ap_w2_vars), "ap_w2", 
  all_of(ap_w3_vars), "ap_w3",
  # Attitude extremity
  all_of(attextr_w1_vars), "attextr_w1", 
  all_of(attextr_w2_vars), "attextr_w2", 
  all_of(attextr_w3_vars), "attextr_w3",
  # Political knowledge
  "polknow_w3"
)

# Get alpha's for batteries

extract_alpha <- function(varset) {
  alpha_data <- final_data_allwaves %>% 
    select(all_of(varset)) %>%
    psych::alpha()
  alpha <- unname(alpha_data$total[2])
  return(alpha)
}

varsets = list(
  ap_w1 = paste0(ap_w1_vars, "_stand"), 
  ap_w2 = paste0(ap_w2_vars, "_stand"), 
  ap_w3 = paste0(ap_w3_vars, "_stand"),
  attextr_w1 = attextr_w1_fold_vars,
  attextr_w2 = attextr_w2_fold_vars,
  attextr_w3 = attextr_w3_fold_vars
  )

alphas <- lapply(varsets, extract_alpha) %>%
  unlist(recursive = FALSE) %>% 
  enframe() %>% 
  unnest(cols = value) %>%
  rename("variable" = name,
         "alpha" = value)

# Summarize variables and indices

summ <- final_data_allwaves %>%
  select(all_of(vars_summ)) %>%
  get_summary_stats(., type = "mean_sd") %>%
  left_join(., alphas, by = "variable") %>% 
  select(variable, alpha, mean, sd) %>%
  mutate(variable = factor(
    variable, ordered = TRUE, 
    levels = vars_summ)) %>%
  arrange(variable) 

codebook <- read.csv(paste0(data_path, "surveys/codebook.csv"))

summ_table <- summ %>% 
  mutate(variable = gsub("_stand|_fold", "", .$variable)) %>%
  left_join(., codebook %>%
              rename("variable" = new_name) %>%
              select(variable, question, question_grid),
            by = "variable") %>%
  mutate(question_grid = ifelse(
    !is.na(question_grid) & question_grid != "",
    paste0("'", question_grid, "'"), question_grid)) %>%
  mutate(question_grid = case_when(
    variable == "age_w0" ~ "Age",
    variable == "gender_w0" ~ "Gender dummy (female = 1)",
    variable == "white_w0" ~ "Ethnicity dummy (white = 1)",
    variable == "edu_high_w0" ~ "Education dummy (high = 1)",
    variable == "int_politics_w2" ~ "'How interested are you in the following topics? Politics' W2",
    variable == "int_politics_w3" ~ "'How interested would you say you are in politics?' W3",
    variable == "polknow_w3" ~ "Political knowledge W3 (summative index 0 - 4)",
    variable == "participation_w2" ~ "Intended participation W2 (summative index 0-9)",
    variable == "participation_w2_bin" ~ "Intended participation W2 (dummy, median cutoff)",
    variable == "participation_w3" ~ "Intended participation W3 (summative index 0-9)",
    variable == "participation_w3_bin" ~ "Intended participation W3 (dummy, median cutoff",
    variable == "ft_party_opp_w1" ~ "'Please rate how you feel about the following groups and individuals: [out-party members]' W1 (0 - 100)",
    variable == "ft_party_opp_w2" ~ "'Please rate how you feel about the following groups and individuals: [out-party members]' W2 (0 - 100)",
    variable == "ft_party_opp_w3" ~ "'Please rate how you feel about the following groups and individuals: [out-party members]' W3 (0 - 100)",
    variable == "mean_party_opp_w1" |
      variable == "selfish_party_opp_w1" |
      variable == "stupid_party_opp_w1"|
      variable == "socdis_party_opp_w1" ~ paste(.$question, .$question_grid, "W1", sep = " "), 
    variable == "mean_party_opp_w2" |
      variable == "selfish_party_opp_w2" |
      variable == "stupid_party_opp_w2"|
      variable == "socdis_party_opp_w2" ~ paste(.$question, .$question_grid, "W2", sep = " "), 
    variable == "mean_party_opp_w3" |
      variable == "selfish_party_opp_w3" |
      variable == "stupid_party_opp_w3"|
      variable == "socdis_party_opp_w3" ~ paste(.$question, .$question_grid, "W3", sep = " "), 
    variable == "ap_w1" ~ "Affective polarization W1 (standardized average index)",
    variable == "ap_w2" ~ "Affective polarization W2 (standardized average index)",
    variable == "ap_w3" ~ "Affective polarization W3 (standardized average index)",
    variable == "attextr_w1" ~ "Attitude extremity W1 (standardized average index of folded items)",
    variable == "attextr_w2" ~ "Attitude extremity W2 (standardized average index of folded items)",
    variable == "attextr_w3" ~ "Attitude extremity  W3 (standardized average index of folded items)",
    TRUE ~ as.character(question_grid)
  )) %>% 
  mutate(question_grid = gsub("\\[Field-op_party_nameW2\\]s|\\[Field-op_party_nameW3\\]s", "[out-party members]", .$question_grid)) %>%
  mutate(question_grid = gsub(":", " ' vs. ' ", .$question_grid)) %>%
  mutate(across(where(is.numeric), round, 2)) %>%
  mutate(across(everything(), as.character)) %>%
  mutate(across(everything(), ~ ifelse(is.na(.) == TRUE, "", .))) %>%
  rename("var" = question_grid)  %>%
  select(var, alpha, mean, sd)

# Table A.3 (Appendix)

htmlTable(summ_table, rnames = NULL)
